How popular is your name?¶

UK ONS 1996-2021¶

Image

Sarah, Jason, Leon & Conrad¶

IMPORTS¶

In [ ]:
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline

from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource
from bokeh.models import NumeralTickFormatter
from bokeh.io import output_notebook
output_notebook()

import plotly.express as px
import plotly.io as pio
pio.renderers.default='notebook'

import seaborn as sns
Loading BokehJS ...

Create lists for later data wrangling¶

In [ ]:
#Setup names for our family.
females = ["Sarah"]
males = ["Jason","Leon","Conrad"]
In [ ]:
#Create some lists to select or reformat data later
count_cols = []     # "1996 Count", ... "2021 Count" ** Used to drop/rename columns
rank_cols = []      # "1996 Rank", ... "2021 Rank" ** Used to drop/rename columns
years = []          # "1996", ... "2021" ** Used to wrangle data and rename columns
for f in range(1996,2022):
    count_cols.append(str(f) + ' Count')
    rank_cols.append(str(f) + ' Rank')
    years.append(str(f))
In [ ]:
#Import ONS data
df1 = pd.read_excel("babynames1996to2021.xlsx","1",skiprows =7)     # Boys names in workbook 1
df2 = pd.read_excel( 'babynames1996to2021.xlsx',"2",skiprows =7)    # Girls names in workbook 2
df1 = df1.set_index("Name")
df2 = df2.set_index("Name") 
In [ ]:
#Replace NaN with 0
df1 = df1.replace("[x]",0)
df2 = df2.replace("[x]",0)

Filter our names to new dataframe¶

In [ ]:
#Filter names from ONS data
Girls = df2.loc[females]
Boys = df1.loc[males]
df_family = pd.concat([Girls, Boys], axis=0)
In [ ]:
#Drop rank and count columns
df_family_counts = df_family.drop(columns=rank_cols)
df_family_ranks = df_family.drop(columns=count_cols)
In [ ]:
#Rename columns "1996 Count" -> "1996" and "1996 Rank" -> "1996"
df_family_counts.columns = df_family_counts.columns.str.replace(" Count","")
df_family_ranks.columns = df_family_ranks.columns.str.replace(" Rank","")
In [ ]:
#Pivot DFs so rows are date observations
df_family_counts = pd.pivot_table(df_family_counts, values = years, columns=["Name"])
df_family_counts.index.name = "Year"
df_family_ranks = pd.pivot_table(df_family_ranks, values = years, columns=["Name"])
df_family_ranks.index.name = "Year"

Tables¶

UK ranking for selected names¶

In [ ]:
#Display the rank table for years since children born
df_family_ranks.tail(14)
Out[ ]:
Name Conrad Jason Leon Sarah
Year
2008 489 139 62 72
2009 473 145 60 79
2010 536 161 60 83
2011 576 159 62 90
2012 734 165 72 95
2013 611 155 75 96
2014 714 162 85 95
2015 779 163 85 96
2016 870 170 92 96
2017 861 179 98 103
2018 851 174 97 103
2019 1152 175 93 107
2020 921 213 93 115
2021 1448 210 101 125

UK count for selected names¶

In [ ]:
df_family_counts.tail(14)
Out[ ]:
Name Conrad Jason Leon Sarah
Year
2008 71 404 1138 930
2009 76 406 1155 793
2010 64 344 1204 722
2011 61 360 1069 663
2012 45 359 918 592
2013 56 372 837 574
2014 45 353 765 601
2015 41 355 795 581
2016 36 340 737 572
2017 36 314 669 530
2018 36 311 655 503
2019 23 302 662 479
2020 31 240 620 422
2021 17 251 590 403
In [ ]:
#Create totals DF
df1_totals = df1.drop(columns=rank_cols).sum()
df2_totals = df2.drop(columns=rank_cols).sum()
df1_totals = df1_totals.reset_index()
df2_totals = df2_totals.reset_index()
df_totals = pd.concat([df1_totals, df2_totals], axis=0)
df_totals = df_totals.replace(count_cols,years)
#df_totals = df1_totals + df2_totals
df_totals.columns=["Years","Total"]
df_totals = df_totals.groupby("Years").sum()

Total baby names by year¶

In [ ]:
df_totals.tail(14)
Out[ ]:
Total
Years
2008 655171
2009 651553
2010 667340
2011 667230
2012 670522
2013 642085
2014 638852
2015 641216
2016 639126
2017 621991
2018 600913
2019 583969
2020 557458
2021 569103

Bokeh Charts¶

In [ ]:
source = ColumnDataSource(df_totals)
p = figure(title="Total UK baby names", x_axis_label="Year", y_axis_label="Babies",x_range=years, width=1100, height=500)
p.line(source=source, x="Years",y="Total")
show(p)
In [ ]:
# create a new plot with a title and axis labels
source = ColumnDataSource(df_family_counts)
p = figure(title="UK baby count for selected names", x_axis_label="", y_axis_label="Babies",x_range=years, width=1100, height=500)

# add a line renderer with legend and line thickness
p.line(x = "Year", y = "Jason", legend_label="Jason", line_width=2,source=source)
p.line(x = "Year", y = "Sarah", legend_label="Sarah", line_color="red",line_width=2,source=source)
p.line(x = "Year", y = "Leon", legend_label="Leon", line_color="yellow",line_width=2,source=source)
p.line(x = "Year", y = "Conrad", legend_label="Conrad", line_color="darkgrey",line_width=2,source=source)

# show the results
show(p)
In [ ]:
# create a new plot with a title and axis labels
source = ColumnDataSource(df_family_ranks)
p = figure(title="UK baby name rankings", x_axis_label="Year",y_range=(1500,1), y_axis_label="Name UK rank",x_range=years, width=1100, height=500)

# add a line renderer with legend and line thickness
p.line(x = "Year", y = "Jason", legend_label="Jason", line_width=2,source=source)
p.line(x = "Year", y = "Sarah", legend_label="Sarah", line_color="red",line_width=2,source=source)
p.line(x = "Year", y = "Leon", legend_label="Leon", line_color="yellow",line_width=2,source=source)
p.line(x = "Year", y = "Conrad", legend_label="Conrad", line_color="darkgrey",line_width=2,source=source)

# show the results
show(p)

Matplotlib Charts¶

In [ ]:
df_totals.plot(title="Total UK baby names",figsize=(15,6),grid=True)
Out[ ]:
<AxesSubplot:title={'center':'Total UK baby names'}, xlabel='Years'>
In [ ]:
df_family_counts.plot(title="UK baby count for selected names",figsize=(15,6),grid=True)
Out[ ]:
<AxesSubplot:title={'center':'UK baby count for selected names'}, xlabel='Year'>
In [ ]:
df_family_ranks.plot(title="UK baby name rankings",figsize=(15,6),grid=True)
plt.ylim(1500,1)
Out[ ]:
(1500.0, 1.0)

Seaborn Charts¶

In [ ]:
fig, ax = plt.subplots(figsize=(15, 6))
sns.lineplot(df_totals).set(title="Total UK baby names")
Out[ ]:
[Text(0.5, 1.0, 'Total UK baby names')]
In [ ]:
fig, ax = plt.subplots(figsize=(15, 6))
sns.lineplot(df_family_counts).set(title="UK baby count for selected names")
Out[ ]:
[Text(0.5, 1.0, 'UK baby count for selected names')]
In [ ]:
fig, ax = plt.subplots(figsize=(15,6))
sns.lineplot(df_family_ranks).set(title="UK baby rank for selected names")
plt.ylim(1500,1)
Out[ ]:
(1500.0, 1.0)

Plotly Charts¶

In [ ]:
px.line(df_totals, title = "Total UK baby names")
In [ ]:
px.line(df_family_counts, title="UK baby count for selected names")
In [ ]:
px.line(df_family_ranks, title="UK baby rank for selected names")